
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
files=os.listdir(r"C:\Users\Nicholas Ra\Covid-19")
files
['country_wise_latest.csv', 'covid_19_clean_complete.csv', 'day_wise.csv', 'full_grouped.csv', 'usa_country_wise.csv', 'worldometer_data.csv']
def read_data(path,filename):
return pd.read_csv(path+'/'+filename)
path=r"C:\Users\Nicholas Ra\Covid-19"
world_data=read_data(path,'worldometer_data.csv')
world_data.head()
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 3.311981e+08 | 5032179 | NaN | 162804.0 | NaN | 2576668.0 | NaN | 2292707.0 | 18296.0 | 15194.0 | 492.0 | 63139605.0 | 190640.0 | Americas |
| 1 | Brazil | South America | 2.127107e+08 | 2917562 | NaN | 98644.0 | NaN | 2047660.0 | NaN | 771258.0 | 8318.0 | 13716.0 | 464.0 | 13206188.0 | 62085.0 | Americas |
| 2 | India | Asia | 1.381345e+09 | 2025409 | NaN | 41638.0 | NaN | 1377384.0 | NaN | 606387.0 | 8944.0 | 1466.0 | 30.0 | 22149351.0 | 16035.0 | South-EastAsia |
| 3 | Russia | Europe | 1.459409e+08 | 871894 | NaN | 14606.0 | NaN | 676357.0 | NaN | 180931.0 | 2300.0 | 5974.0 | 100.0 | 29716907.0 | 203623.0 | Europe |
| 4 | South Africa | Africa | 5.938157e+07 | 538184 | NaN | 9604.0 | NaN | 387316.0 | NaN | 141264.0 | 539.0 | 9063.0 | 162.0 | 3149807.0 | 53044.0 | Africa |
day_wise=read_data(path,files[2])
group_data=read_data(path,files[3])
usa_data=read_data(path,files[4])
province_data=read_data(path,files[1])
province_data.shape
(49068, 10)
world_data.head()
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 3.311981e+08 | 5032179 | NaN | 162804.0 | NaN | 2576668.0 | NaN | 2292707.0 | 18296.0 | 15194.0 | 492.0 | 63139605.0 | 190640.0 | Americas |
| 1 | Brazil | South America | 2.127107e+08 | 2917562 | NaN | 98644.0 | NaN | 2047660.0 | NaN | 771258.0 | 8318.0 | 13716.0 | 464.0 | 13206188.0 | 62085.0 | Americas |
| 2 | India | Asia | 1.381345e+09 | 2025409 | NaN | 41638.0 | NaN | 1377384.0 | NaN | 606387.0 | 8944.0 | 1466.0 | 30.0 | 22149351.0 | 16035.0 | South-EastAsia |
| 3 | Russia | Europe | 1.459409e+08 | 871894 | NaN | 14606.0 | NaN | 676357.0 | NaN | 180931.0 | 2300.0 | 5974.0 | 100.0 | 29716907.0 | 203623.0 | Europe |
| 4 | South Africa | Africa | 5.938157e+07 | 538184 | NaN | 9604.0 | NaN | 387316.0 | NaN | 141264.0 | 539.0 | 9063.0 | 162.0 | 3149807.0 | 53044.0 | Africa |
world_data.columns
Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
'TotalTests', 'Tests/1M pop', 'WHO Region'],
dtype='object')
conda install -c plotly plotly_express==0.4.0
Collecting package metadata (current_repodata.json): ...working... done
Note: you may need to restart the kernel to use updated packages.
Solving environment: ...working... done
## Package Plan ##
environment location: C:\Anaconda3
added / updated specs:
- plotly_express==0.4.0
The following packages will be downloaded:
package | build
---------------------------|-----------------
plotly-4.14.3 | py_0 5.9 MB plotly
plotly_express-0.4.0 | py_0 5 KB plotly
------------------------------------------------------------
Total: 5.9 MB
The following NEW packages will be INSTALLED:
plotly plotly/noarch::plotly-4.14.3-py_0
plotly_express plotly/noarch::plotly_express-0.4.0-py_0
retrying pkgs/main/noarch::retrying-1.3.3-py_2
Downloading and Extracting Packages
plotly-4.14.3 | 5.9 MB | | 0%
plotly-4.14.3 | 5.9 MB | ########## | 100%
plotly-4.14.3 | 5.9 MB | ########## | 100%
plotly_express-0.4.0 | 5 KB | | 0%
plotly_express-0.4.0 | 5 KB | ########## | 100%
plotly_express-0.4.0 | 5 KB | ########## | 100%
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done
import plotly.express as px
columns=['TotalCases','TotalDeaths','TotalRecovered','ActiveCases']
for i in columns:
fig=px.treemap(world_data.iloc[0:20],values=i,path=['Country/Region'],title='Treemap representative of different countries w.r.t to their{}'.format(i))
fig.show()
day_wise.head()
| Date | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | Deaths / 100 Cases | Recovered / 100 Cases | Deaths / 100 Recovered | No. of countries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | 555 | 17 | 28 | 510 | 0 | 0 | 0 | 3.06 | 5.05 | 60.71 | 6 |
| 1 | 2020-01-23 | 654 | 18 | 30 | 606 | 99 | 1 | 2 | 2.75 | 4.59 | 60.00 | 8 |
| 2 | 2020-01-24 | 941 | 26 | 36 | 879 | 287 | 8 | 6 | 2.76 | 3.83 | 72.22 | 9 |
| 3 | 2020-01-25 | 1434 | 42 | 39 | 1353 | 493 | 16 | 3 | 2.93 | 2.72 | 107.69 | 11 |
| 4 | 2020-01-26 | 2118 | 56 | 52 | 2010 | 684 | 14 | 13 | 2.64 | 2.46 | 107.69 | 13 |
day_wise.columns
Index(['Date', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'New cases',
'New deaths', 'New recovered', 'Deaths / 100 Cases',
'Recovered / 100 Cases', 'Deaths / 100 Recovered', 'No. of countries'],
dtype='object')
px.line(day_wise,x="Date",y=['Confirmed','Deaths','Recovered','Active'],title='covid cases w.r.t to date',template='plotly_dark')
world_data.head()
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 3.311981e+08 | 5032179 | NaN | 162804.0 | NaN | 2576668.0 | NaN | 2292707.0 | 18296.0 | 15194.0 | 492.0 | 63139605.0 | 190640.0 | Americas |
| 1 | Brazil | South America | 2.127107e+08 | 2917562 | NaN | 98644.0 | NaN | 2047660.0 | NaN | 771258.0 | 8318.0 | 13716.0 | 464.0 | 13206188.0 | 62085.0 | Americas |
| 2 | India | Asia | 1.381345e+09 | 2025409 | NaN | 41638.0 | NaN | 1377384.0 | NaN | 606387.0 | 8944.0 | 1466.0 | 30.0 | 22149351.0 | 16035.0 | South-EastAsia |
| 3 | Russia | Europe | 1.459409e+08 | 871894 | NaN | 14606.0 | NaN | 676357.0 | NaN | 180931.0 | 2300.0 | 5974.0 | 100.0 | 29716907.0 | 203623.0 | Europe |
| 4 | South Africa | Africa | 5.938157e+07 | 538184 | NaN | 9604.0 | NaN | 387316.0 | NaN | 141264.0 | 539.0 | 9063.0 | 162.0 | 3149807.0 | 53044.0 | Africa |
population_test_ratio=world_data['Population']/world_data['TotalTests'].iloc[0:20]
fig=px.bar(world_data.iloc[0:20],x='Country/Region',y=population_test_ratio[0:20],color='Country/Region',title='Population to tests done ratio')
fig.show()
world_data.columns
Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
'TotalTests', 'Tests/1M pop', 'WHO Region'],
dtype='object')
fig=px.bar(world_data.iloc[0:6],x='Country/Region',y=['Serious','Critical','TotalDeaths','TotalRecovered','ActiveCases','TotalCases'])
fig.show()
fig=px.bar(world_data.iloc[0:20],y='Country/Region',x='TotalCases',color='TotalCases',text='TotalCases')
fig.update_layout(template='plotly_dark',title_text='Top 20 countries of total confirmed cases')
fig.show()
world_data.sort_values(by='TotalDeaths',ascending=False)
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 3.311981e+08 | 5032179 | NaN | 162804.0 | NaN | 2576668.0 | NaN | 2292707.0 | 18296.0 | 15194.0 | 492.0 | 63139605.0 | 190640.0 | Americas |
| 1 | Brazil | South America | 2.127107e+08 | 2917562 | NaN | 98644.0 | NaN | 2047660.0 | NaN | 771258.0 | 8318.0 | 13716.0 | 464.0 | 13206188.0 | 62085.0 | Americas |
| 5 | Mexico | North America | 1.290662e+08 | 462690 | 6590.0 | 50517.0 | 819.0 | 308848.0 | 4140.0 | 103325.0 | 3987.0 | 3585.0 | 391.0 | 1056915.0 | 8189.0 | Americas |
| 11 | UK | Europe | 6.792203e+07 | 308134 | NaN | 46413.0 | NaN | NaN | NaN | NaN | 73.0 | 4537.0 | 683.0 | 17515234.0 | 257873.0 | Europe |
| 2 | India | Asia | 1.381345e+09 | 2025409 | NaN | 41638.0 | NaN | 1377384.0 | NaN | 606387.0 | 8944.0 | 1466.0 | 30.0 | 22149351.0 | 16035.0 | South-EastAsia |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 202 | Saint Kitts and Nevis | North America | 5.323700e+04 | 17 | NaN | NaN | NaN | 16.0 | NaN | 1.0 | NaN | 319.0 | NaN | 1146.0 | 21526.0 | Americas |
| 203 | Greenland | North America | 5.678000e+04 | 14 | NaN | NaN | NaN | 14.0 | NaN | 0.0 | NaN | 247.0 | NaN | 5977.0 | 105266.0 | Europe |
| 205 | Caribbean Netherlands | North America | 2.624700e+04 | 13 | NaN | NaN | NaN | 7.0 | NaN | 6.0 | NaN | 495.0 | NaN | 424.0 | 16154.0 | NaN |
| 206 | Falkland Islands | South America | 3.489000e+03 | 13 | NaN | NaN | NaN | 13.0 | NaN | 0.0 | NaN | 3726.0 | NaN | 1816.0 | 520493.0 | NaN |
| 207 | Vatican City | Europe | 8.010000e+02 | 12 | NaN | NaN | NaN | 12.0 | NaN | 0.0 | NaN | 14981.0 | NaN | NaN | NaN | Europe |
209 rows × 16 columns
fig=px.bar(world_data.sort_values(by='TotalDeaths',ascending=False)[0:20],y='Country/Region',x='TotalDeaths',color='TotalDeaths',text='TotalDeaths')
fig.update_layout(template='plotly_dark',title_text='Top 20 countries of total deaths cases')
fig.show()
world_data.columns
Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
'TotalTests', 'Tests/1M pop', 'WHO Region'],
dtype='object')
fig=px.bar(world_data.sort_values(by='ActiveCases',ascending=False)[0:20],y='Country/Region',x='ActiveCases',color='ActiveCases',text='ActiveCases')
fig.update_layout(template='plotly_dark',title_text='Top 20 countries of total active cases')
fig.show()
fig=px.bar(world_data.sort_values(by='TotalRecovered',ascending=False)[0:20],y='Country/Region',x='TotalRecovered',color='TotalRecovered',text='TotalRecovered')
fig.update_layout(template='plotly_dark',title_text='Top 20 countries of total recovered cases')
fig.show()
world_data.head()
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 3.311981e+08 | 5032179 | NaN | 162804.0 | NaN | 2576668.0 | NaN | 2292707.0 | 18296.0 | 15194.0 | 492.0 | 63139605.0 | 190640.0 | Americas |
| 1 | Brazil | South America | 2.127107e+08 | 2917562 | NaN | 98644.0 | NaN | 2047660.0 | NaN | 771258.0 | 8318.0 | 13716.0 | 464.0 | 13206188.0 | 62085.0 | Americas |
| 2 | India | Asia | 1.381345e+09 | 2025409 | NaN | 41638.0 | NaN | 1377384.0 | NaN | 606387.0 | 8944.0 | 1466.0 | 30.0 | 22149351.0 | 16035.0 | South-EastAsia |
| 3 | Russia | Europe | 1.459409e+08 | 871894 | NaN | 14606.0 | NaN | 676357.0 | NaN | 180931.0 | 2300.0 | 5974.0 | 100.0 | 29716907.0 | 203623.0 | Europe |
| 4 | South Africa | Africa | 5.938157e+07 | 538184 | NaN | 9604.0 | NaN | 387316.0 | NaN | 141264.0 | 539.0 | 9063.0 | 162.0 | 3149807.0 | 53044.0 | Africa |
labels=world_data[0:15]['Country/Region'].values
cases=['TotalCases','TotalDeaths','TotalRecovered','ActiveCases']
for i in cases:
fig=px.pie(world_data[0:15],values=i,names=labels,hole=0.3,title="{}recorded w.r.t to WHO region of 15 worst affected countries".format(i))
fig.show()
world_data.head()
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 3.311981e+08 | 5032179 | NaN | 162804.0 | NaN | 2576668.0 | NaN | 2292707.0 | 18296.0 | 15194.0 | 492.0 | 63139605.0 | 190640.0 | Americas |
| 1 | Brazil | South America | 2.127107e+08 | 2917562 | NaN | 98644.0 | NaN | 2047660.0 | NaN | 771258.0 | 8318.0 | 13716.0 | 464.0 | 13206188.0 | 62085.0 | Americas |
| 2 | India | Asia | 1.381345e+09 | 2025409 | NaN | 41638.0 | NaN | 1377384.0 | NaN | 606387.0 | 8944.0 | 1466.0 | 30.0 | 22149351.0 | 16035.0 | South-EastAsia |
| 3 | Russia | Europe | 1.459409e+08 | 871894 | NaN | 14606.0 | NaN | 676357.0 | NaN | 180931.0 | 2300.0 | 5974.0 | 100.0 | 29716907.0 | 203623.0 | Europe |
| 4 | South Africa | Africa | 5.938157e+07 | 538184 | NaN | 9604.0 | NaN | 387316.0 | NaN | 141264.0 | 539.0 | 9063.0 | 162.0 | 3149807.0 | 53044.0 | Africa |
deaths_to_confirmed=world_data['TotalDeaths']/world_data['TotalCases']
deaths_to_confirmed
0 0.032353
1 0.033810
2 0.020558
3 0.016752
4 0.017845
...
204 0.076923
205 NaN
206 NaN
207 NaN
208 0.100000
Length: 209, dtype: float64
px.bar(world_data,x='Country/Region',y=deaths_to_confirmed,title='Deaths to confirmed ratio of worst affected countries')
deaths_to_recovered=world_data['TotalDeaths']/world_data['TotalRecovered']
px.bar(world_data,x='Country/Region',y=deaths_to_confirmed,title='Deaths to recovered ratio of worst affected countries')
world_data['Serious,Critical']/world_data['TotalDeaths']
0 0.112381
1 0.084323
2 0.214804
3 0.157470
4 0.056122
...
204 NaN
205 NaN
206 NaN
207 NaN
208 NaN
Length: 209, dtype: float64
px.bar(world_data,x='Country/Region',y=world_data['Serious,Critical']/world_data['TotalDeaths'],title='Serious/Critical to deaths ratio of worst affected countries')
group_data.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Eastern Mediterranean |
| 1 | 2020-01-22 | Albania | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Europe |
| 2 | 2020-01-22 | Algeria | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Africa |
| 3 | 2020-01-22 | Andorra | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Europe |
| 4 | 2020-01-22 | Angola | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Africa |
from plotly.subplots import make_subplots
import plotly.graph_objects as go
def country_visualization(group_data,country):
data=group_data[group_data['Country/Region']==country]
df=data.loc[:,['Date','Confirmed','Deaths','Recovered','Active']]
fig = make_subplots(rows=1,cols=4,subplot_titles=('Confirmed','Active','Recovered','Deaths'))
fig.add_trace(
go.Scatter(name='Confirmed',x=df['Date'],y=df['Confirmed']),row=1,col=1)
fig.add_trace(
go.Scatter(name='Deaths',x=df['Date'],y=df['Deaths']),row=1,col=2)
fig.add_trace(
go.Scatter(name='Recovered',x=df['Date'],y=df['Recovered']),row=1,col=3)
fig.add_trace(
go.Scatter(name='Active',x=df['Date'],y=df['Active']),row=1,col=4)
fig.update_layout(height=600,width=1000,title_text='Date vs Recorded cases of {}'.format(country),template="plotly_dark")
fig.show()
# The next line of code is for automating
country_visualization(group_data, 'Brazil')
country_visualization(group_data, 'US')
country_visualization(group_data, 'Romania')
country_visualization(group_data, 'Russia')